*	************************************************************************
* 	File-Name: 		CPS_AppendixC_balancetests.do
*
*	Data Used:  	trump_polls_aug12.dta version; JHU covid data; YouGov; Monmouth polls
*					fivethirtyeight; RMG polls; policy data from Raifman et al. (2020)
*
*	Purpose:   		.do file replicating balance tests in Appendix 
*	
*	************************************************************************

****************************************************************************
**Balance tests on High v. Low Education
****************************************************************************

ssc install ietoolkit

clear all 
cd "/Users/alicezxu/Dropbox/Trump Polls/CPS Replication/Replication_final/Data/"
use CPS_trumppolls_final, clear

gen weeks_since_firstcase = round(((start_date - first_covid_date) / 7), 1)
label var weeks_since_firstcase "Weeks Since First COVID-19 Case"

global DESCVARS weeks_since_firstcase after_business after_k12 after_emergency after_lock after_mask cumulative_cases Trump_pct_2016 popestimate2019 share_female share_black share_hispanic share_asian_hawaii_pacific mean_age gdp_percapita_thousands unemployment_rate
mata: mata clear

***High vs. low educ
quietly: sum colgrad_perc, detail
gen median_colgrad = r(p50)

gen highcolgrad = 0 
replace highcolgrad = 1 if colgrad_perc >= median_colgrad

gen lowcolgrad = 0 
replace lowcolgrad = 1 if colgrad_perc < median_colgrad

**Stateweek ID for clustering:
encode state, gen(nstate)
tostring nstate, gen(nstate_str)
gen week = week(start_date)
tostring week, gen(week_str)
gen state_week = nstate_str + week_str

* First test of differences
local i = 1

foreach var in $DESCVARS {
    reg `var' highcolgrad, vce(cluster state_week)
    outreg, keep(highcolgrad)  rtitle("`: var label `var''") stats(b) ///
        noautosumm store(row`i')  starlevels(10 5 1) starloc(1)
    outreg, replay(diff) append(row`i') ctitles("",Difference ) ///
        store(diff) note("")
    local ++i
}
outreg, replay(diff)


* Then Summary statistics
local count: word count $DESCVARS
mat sumstat = J(`count',6,.)

local i = 1
foreach var in $DESCVARS {
    quietly: summarize `var' if highcolgrad==0
    mat sumstat[`i',1] = r(N)
    mat sumstat[`i',2] = r(mean)
    mat sumstat[`i',3] = r(sd)
    quietly: summarize `var' if highcolgrad==1
    mat sumstat[`i',4] = r(N)
    mat sumstat[`i',5] = r(mean)
    mat sumstat[`i',6] = r(sd)
    local i = `i' + 1
}
frmttable, statmat(sumstat) store(sumstat) sfmt(g,f,f,g,f,f)

cd "/Users/alicezxu/Dropbox/Trump Polls/CPS Replication/Replication File/"
outreg using "covidtrump_balancetest_colgrad.csv", ///
    replay(sumstat) merge(diff) tex nocenter note("") fragment plain replace ///
    ctitles("", Control, "", "", Treatment, "", "", "" \ "", n, mean, sd, n, mean, sd, Diff) ///
    multicol(1,2,3;1,5,3) 
	
****************************************************************************
**Balance tests on High v. Low Non-Teleworkability
****************************************************************************	

clear all 
cd "/Users/alicezxu/Dropbox/Trump Polls/CPS Replication/Replication_final/Data/"
use CPS_trumppolls_final, clear

global DESCVARS weeks_since_firstcase after_business after_k12 after_emergency after_lock after_mask cumulative_cases Trump_pct_2016 popestimate2019 share_female share_black share_hispanic share_asian_hawaii_pacific mean_age gdp_percapita_thousands unemployment_rate
mata: mata clear
	
***Define "treatment" groups: Non-Teleworkable
quietly: sum non_teleworkable_emp, detail
gen median_nontelework = r(p50)

gen high_nontelework = 0 
replace high_nontelework = 1 if non_teleworkable_emp >= median_nontelework

**Stateweek ID for clustering:
encode state, gen(nstate)
tostring nstate, gen(nstate_str)
gen week = week(start_date)
tostring week, gen(week_str)
gen state_week = nstate_str + week_str

* First test of differences
local i = 1

foreach var in $DESCVARS {
    reg `var' high_nontelework, vce(cluster state_week)
    outreg, keep(high_nontelework)  rtitle("`: var label `var''") stats(b) ///
        noautosumm store(row`i')  starlevels(10 5 1) starloc(1)
    outreg, replay(diff) append(row`i') ctitles("",Difference ) ///
        store(diff) note("")
    local ++i
}
outreg, replay(diff)


* Then Summary statistics
local count: word count $DESCVARS
mat sumstat = J(`count',6,.)

local i = 1
foreach var in $DESCVARS {
    quietly: summarize `var' if high_nontelework==0
    mat sumstat[`i',1] = r(N)
    mat sumstat[`i',2] = r(mean)
    mat sumstat[`i',3] = r(sd)
    quietly: summarize `var' if high_nontelework==1
    mat sumstat[`i',4] = r(N)
    mat sumstat[`i',5] = r(mean)
    mat sumstat[`i',6] = r(sd)
    local i = `i' + 1
}
frmttable, statmat(sumstat) store(sumstat) sfmt(g,f,f,g,f,f)

cd "/Users/alicezxu/Dropbox/Trump Polls/CPS Replication/Replication File/"
outreg using "covidtrump_balancetest_telework.csv", ///
    replay(sumstat) merge(diff) tex nocenter note("") fragment plain replace ///
    ctitles("", Control, "", "", Treatment, "", "", "" \ "", n, mean, sd, n, mean, sd, Diff) ///
    multicol(1,2,3;1,5,3) 
